# script for video on goodness of fit # First look at a test of a single proportion. # This is from Topic 17 # source("../hypo_prop.R") hypoth_test_prop( 0.1, 13, 225, 0, 0.02) # for goodness of fit we want the proportions for all # of the possible outcomes. These are the null # hypothesis proportions: # null_props <- c(0.15, 0.09, 0.1, 0.15, 0.135, 0.08, 0.14, 0.155) # Then, knowing that we will take or even have taken a sample # of size 225, find the expected values for each outcome expected <- null_props * 225 expected source("../gnrnd5.R") gnrnd5(95632022407,985785588) L1 table(L1) #so here are the observed values observed <- c(27, 19, 14, 39, 30, 18, 39, 39 ) observed # then we want to find the observed - expected values diff <- observed - expected diff # and we move on from there to get the squares of those # differences diff_sqr <- diff^2 diff_sqr # That magnified the values that we big differences and # it made everything positive. Now divide each of those # by the respective "expected" value so that the same # differences from larger expected values carries less # weight than do similar differences form lower expected # values. quotients <- diff_sqr / expected quotients # # Now to find the overall "strangeness" of our observed # values from the expected values we get the sum of # all of those quotients. how_strange <- sum( quotients ) how_strange # # Even if our true population had exactly the proportions # given in the null hypothesis we would not expect a sample # of 225 items to have those same proportions. Each such # sample would have differences between the observed values # and the expected values. Each such sample would therefore # have a value for "how_strange". The distribution of # those "how_strange" values will be a chi-squared distribution # with the degrees of freedom equal to one less than the # number of different outcomes. We have 8 possible outcomes # so there are 7 degrees of freedom. THerefore, we can say, # if the null hypothesis is true then how strange is it to # get a "how_strange" value of 7.733185 or higher? pchisq( 7.733185, 7, lower.tail=FALSE) # That is not strange at all. # Or we could find the critical value for 7 degrees of freedom # and for a level of significance of 0.02. qchisq( 0.02, 7, lower.tail=FALSE) ### we can do all of this in one step with the goodfit() ### function source( "../goodfit.R") goodfit( 1:8, null_props, observed, 0.02)